Hotel_Booking_EDA_Analysis¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
df = pd.read_csv("hotel_bookings.csv")
df.head(5)
Out[2]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit NaN NaN 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit NaN NaN 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit NaN NaN 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 NaN 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 NaN 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [3]:
df.shape
Out[3]:
(119390, 32)
In [4]:
df.columns
Out[4]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [5]:
df.isnull().values.any()
Out[5]:
True
In [6]:
df.isnull().sum()
Out[6]:
hotel                                  0
is_canceled                            0
lead_time                              0
arrival_date_year                      0
arrival_date_month                     0
arrival_date_week_number               0
arrival_date_day_of_month              0
stays_in_weekend_nights                0
stays_in_week_nights                   0
adults                                 0
children                               4
babies                                 0
meal                                   0
country                              488
market_segment                         0
distribution_channel                   0
is_repeated_guest                      0
previous_cancellations                 0
previous_bookings_not_canceled         0
reserved_room_type                     0
assigned_room_type                     0
booking_changes                        0
deposit_type                           0
agent                              16340
company                           112593
days_in_waiting_list                   0
customer_type                          0
adr                                    0
required_car_parking_spaces            0
total_of_special_requests              0
reservation_status                     0
reservation_status_date                0
dtype: int64
In [7]:
df.fillna(0, inplace = True)
In [8]:
df.isnull().sum()
Out[8]:
hotel                             0
is_canceled                       0
lead_time                         0
arrival_date_year                 0
arrival_date_month                0
arrival_date_week_number          0
arrival_date_day_of_month         0
stays_in_weekend_nights           0
stays_in_week_nights              0
adults                            0
children                          0
babies                            0
meal                              0
country                           0
market_segment                    0
distribution_channel              0
is_repeated_guest                 0
previous_cancellations            0
previous_bookings_not_canceled    0
reserved_room_type                0
assigned_room_type                0
booking_changes                   0
deposit_type                      0
agent                             0
company                           0
days_in_waiting_list              0
customer_type                     0
adr                               0
required_car_parking_spaces       0
total_of_special_requests         0
reservation_status                0
reservation_status_date           0
dtype: int64
In [9]:
df['meal'].value_counts()
Out[9]:
meal
BB           92310
HB           14463
SC           10650
Undefined     1169
FB             798
Name: count, dtype: int64
In [10]:
df['children'].value_counts()
Out[10]:
children
0.0     110800
1.0       4861
2.0       3652
3.0         76
10.0         1
Name: count, dtype: int64
In [11]:
df['adults'].unique()
Out[11]:
array([ 2,  1,  3,  4, 40, 26, 50, 27, 55,  0, 20,  6,  5, 10],
      dtype=int64)
In [12]:
df['babies'].unique()
Out[12]:
array([ 0,  1,  2, 10,  9], dtype=int64)
In [13]:
len(df[df['adults'] == 0])
Out[13]:
403
In [14]:
len(df[df['babies'] == 0])
Out[14]:
118473
In [15]:
filter = (df['children'] == 0) & (df['adults'] == 0) & (df['babies'] == 0)
df[filter]
Out[15]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
2224 Resort Hotel 0 1 2015 October 41 6 0 3 0 ... No Deposit 0.0 174.0 0 Transient-Party 0.00 0 0 Check-Out 10/6/2015
2409 Resort Hotel 0 0 2015 October 42 12 0 0 0 ... No Deposit 0.0 174.0 0 Transient 0.00 0 0 Check-Out 10/12/2015
3181 Resort Hotel 0 36 2015 November 47 20 1 2 0 ... No Deposit 38.0 0.0 0 Transient-Party 0.00 0 0 Check-Out 11/23/2015
3684 Resort Hotel 0 165 2015 December 53 30 1 4 0 ... No Deposit 308.0 0.0 122 Transient-Party 0.00 0 0 Check-Out 1/4/2016
3708 Resort Hotel 0 165 2015 December 53 30 2 4 0 ... No Deposit 308.0 0.0 122 Transient-Party 0.00 0 0 Check-Out 1/5/2016
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
115029 City Hotel 0 107 2017 June 26 27 0 3 0 ... No Deposit 7.0 0.0 0 Transient 100.80 0 0 Check-Out 6/30/2017
115091 City Hotel 0 1 2017 June 26 30 0 1 0 ... No Deposit 0.0 0.0 0 Transient 0.00 1 1 Check-Out 7/1/2017
116251 City Hotel 0 44 2017 July 28 15 1 1 0 ... No Deposit 425.0 0.0 0 Transient 73.80 0 0 Check-Out 7/17/2017
116534 City Hotel 0 2 2017 July 28 15 2 5 0 ... No Deposit 9.0 0.0 0 Transient-Party 22.86 0 1 Check-Out 7/22/2017
117087 City Hotel 0 170 2017 July 30 27 0 2 0 ... No Deposit 52.0 0.0 0 Transient 0.00 0 0 Check-Out 7/29/2017

180 rows × 32 columns

In [16]:
data = df[~filter]
In [17]:
data.shape
Out[17]:
(119210, 32)
In [18]:
df['hotel'].unique()
Out[18]:
array(['Resort Hotel', 'City Hotel'], dtype=object)
In [19]:
df.head(10)
Out[19]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015
5 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015
6 Resort Hotel 0 0 2015 July 27 1 0 2 2 ... No Deposit 0.0 0.0 0 Transient 107.0 0 0 Check-Out 7/3/2015
7 Resort Hotel 0 9 2015 July 27 1 0 2 2 ... No Deposit 303.0 0.0 0 Transient 103.0 0 1 Check-Out 7/3/2015
8 Resort Hotel 1 85 2015 July 27 1 0 3 2 ... No Deposit 240.0 0.0 0 Transient 82.0 0 1 Canceled 5/6/2015
9 Resort Hotel 1 75 2015 July 27 1 0 3 2 ... No Deposit 15.0 0.0 0 Transient 105.5 0 0 Canceled 4/22/2015

10 rows × 32 columns

Perform Spatial Analysis on Guests Home-Town¶

    1. From where do the guests come from & perform Spatial Analysis too
In [20]:
data.head(2)
Out[20]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015

2 rows × 32 columns

In [21]:
data['hotel'].unique()
Out[21]:
array(['Resort Hotel', 'City Hotel'], dtype=object)
In [22]:
data['is_canceled'].unique()
Out[22]:
array([0, 1], dtype=int64)
In [23]:
resort = data[(data['hotel'] == 'Resort Hotel') & (data['is_canceled'] == 0)]
City = data[(data['hotel'] == 'City Hotel') & (data['is_canceled'] == 0)]
In [24]:
resort.shape
Out[24]:
(28927, 32)
In [25]:
City.shape
Out[25]:
(46084, 32)
In [26]:
resort['country'].value_counts()
Out[26]:
country
PRT    10184
GBR     5922
ESP     3105
IRL     1734
FRA     1399
       ...  
UGA        1
COM        1
MUS        1
BIH        1
SAU        1
Name: count, Length: 119, dtype: int64
In [27]:
resort['country'].value_counts().index
Out[27]:
Index(['PRT', 'GBR', 'ESP', 'IRL', 'FRA', 'DEU',  'CN', 'NLD',     0, 'USA',
       ...
       'MKD', 'SMR', 'BDI', 'SYR', 'CYM', 'UGA', 'COM', 'MUS', 'BIH', 'SAU'],
      dtype='object', name='country', length=119)
In [28]:
lables = resort['country'].value_counts().index
values = resort['country'].value_counts()
In [29]:
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.express as px
In [30]:
trace=go.Pie(labels=lables, values=values,
               hoverinfo='label+percent', textinfo='value'
               )
In [31]:
iplot([trace])
In [32]:
data.columns
Out[32]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [33]:
country_wise = data[data['is_canceled'] == 0]['country'].value_counts().reset_index()
country_wise.columns = ['country', 'No of guests']
country_wise
Out[33]:
country No of guests
0 PRT 20977
1 GBR 9668
2 FRA 8468
3 ESP 6383
4 DEU 6067
... ... ...
161 BHR 1
162 DJI 1
163 MLI 1
164 NPL 1
165 FRO 1

166 rows × 2 columns

In [34]:
!pip install folium
Requirement already satisfied: folium in c:\users\abhis\anaconda3\lib\site-packages (0.17.0)
Requirement already satisfied: branca>=0.6.0 in c:\users\abhis\anaconda3\lib\site-packages (from folium) (0.7.2)
Requirement already satisfied: jinja2>=2.9 in c:\users\abhis\anaconda3\lib\site-packages (from folium) (3.1.3)
Requirement already satisfied: numpy in c:\users\abhis\anaconda3\lib\site-packages (from folium) (1.26.4)
Requirement already satisfied: requests in c:\users\abhis\anaconda3\lib\site-packages (from folium) (2.31.0)
Requirement already satisfied: xyzservices in c:\users\abhis\anaconda3\lib\site-packages (from folium) (2022.9.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\abhis\anaconda3\lib\site-packages (from jinja2>=2.9->folium) (2.1.3)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\abhis\anaconda3\lib\site-packages (from requests->folium) (2024.6.2)
In [35]:
# map graph
import folium
from folium.plugins import HeatMap
In [36]:
b = folium.Map()
In [37]:
country_wise.dtypes
Out[37]:
country         object
No of guests     int64
dtype: object
In [38]:
gueats = px.choropleth(country_wise, 
                      locations = country_wise['country'],
                      color = country_wise['No of guests'],
                      hover_name = country_wise['country'],
                       title = "Home country of gueats"
                      )
In [39]:
gueats.show()
In [40]:
!pip install sort-dataframeby-monthorweek
Requirement already satisfied: sort-dataframeby-monthorweek in c:\users\abhis\anaconda3\lib\site-packages (0.4)
In [41]:
!pip install sorted-months-weekdays
Requirement already satisfied: sorted-months-weekdays in c:\users\abhis\anaconda3\lib\site-packages (0.2)

2. How much do guests pay for a room per night?¶

3. How does the price per night vary over the year?¶

In [42]:
data.head()
Out[42]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [43]:
# How much do guests pay for a room per night?
data2 = data[data['is_canceled'] == 0]
In [44]:
data2.columns
Out[44]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [45]:
plt.figure(figsize=(12, 8))
sns.boxplot(x = 'reserved_room_type',
           y = 'adr', 
           hue = 'hotel',
           data = data2)
plt.title("Price of room types per night and person")
plt.xlabel("Room Type")
plt.ylabel("Price [EUR]")
plt.legend(loc = 'upper right')
plt.ylim(0, 600)
plt.show()
In [46]:
# How does the price per night vary over the year?
In [47]:
data_resort = resort[resort['is_canceled'] == 0]
In [48]:
data_city = City[City['is_canceled'] == 0]
In [49]:
data_city
Out[49]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
40060 City Hotel 0 6 2015 July 27 1 0 2 1 ... No Deposit 6.0 0.0 0 Transient 0.00 0 0 Check-Out 7/3/2015
40066 City Hotel 0 3 2015 July 27 2 0 3 1 ... No Deposit 1.0 0.0 0 Transient-Party 58.67 0 0 Check-Out 7/5/2015
40070 City Hotel 0 43 2015 July 27 3 0 2 2 ... No Deposit 1.0 0.0 0 Transient-Party 86.00 0 0 Check-Out 7/5/2015
40071 City Hotel 0 43 2015 July 27 3 0 2 2 ... No Deposit 1.0 0.0 0 Transient-Party 43.00 0 0 Check-Out 7/5/2015
40072 City Hotel 0 43 2015 July 27 3 0 2 2 ... No Deposit 1.0 0.0 0 Transient-Party 86.00 0 0 Check-Out 7/5/2015
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
119385 City Hotel 0 23 2017 August 35 30 2 5 2 ... No Deposit 394.0 0.0 0 Transient 96.14 0 0 Check-Out 9/6/2017
119386 City Hotel 0 102 2017 August 35 31 2 5 3 ... No Deposit 9.0 0.0 0 Transient 225.43 0 2 Check-Out 9/7/2017
119387 City Hotel 0 34 2017 August 35 31 2 5 2 ... No Deposit 9.0 0.0 0 Transient 157.71 0 4 Check-Out 9/7/2017
119388 City Hotel 0 109 2017 August 35 31 2 5 2 ... No Deposit 89.0 0.0 0 Transient 104.40 0 0 Check-Out 9/7/2017
119389 City Hotel 0 205 2017 August 35 29 2 7 2 ... No Deposit 9.0 0.0 0 Transient 151.20 0 2 Check-Out 9/7/2017

46084 rows × 32 columns

In [50]:
data_resort
Out[50]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.00 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.00 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.00 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.00 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.00 0 1 Check-Out 7/3/2015
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
40055 Resort Hotel 0 212 2017 August 35 31 2 8 2 ... No Deposit 143.0 0.0 0 Transient 89.75 0 0 Check-Out 9/10/2017
40056 Resort Hotel 0 169 2017 August 35 30 2 9 2 ... No Deposit 250.0 0.0 0 Transient-Party 202.27 0 1 Check-Out 9/10/2017
40057 Resort Hotel 0 204 2017 August 35 29 4 10 2 ... No Deposit 250.0 0.0 0 Transient 153.57 0 3 Check-Out 9/12/2017
40058 Resort Hotel 0 211 2017 August 35 31 4 10 2 ... No Deposit 40.0 0.0 0 Contract 112.80 0 1 Check-Out 9/14/2017
40059 Resort Hotel 0 161 2017 August 35 31 4 10 2 ... No Deposit 69.0 0.0 0 Transient 99.06 0 0 Check-Out 9/14/2017

28927 rows × 32 columns

In [51]:
resort_hotel = data_resort.groupby(['arrival_date_month'])['adr'].mean().reset_index()
resort_hotel
Out[51]:
arrival_date_month adr
0 April 75.867816
1 August 181.205892
2 December 68.410104
3 February 54.147478
4 January 48.761125
5 July 150.122528
6 June 107.974850
7 March 57.056838
8 May 76.657558
9 November 48.706289
10 October 61.775449
11 September 96.416860
In [52]:
city_hotel = data_city.groupby(['arrival_date_month'])['adr'].mean().reset_index()
city_hotel
Out[52]:
arrival_date_month adr
0 April 111.962267
1 August 118.674598
2 December 88.401855
3 February 86.520062
4 January 82.330983
5 July 115.818019
6 June 117.874360
7 March 90.658533
8 May 120.669827
9 November 86.946592
10 October 102.004672
11 September 112.776582
In [53]:
final=resort_hotel.merge(city_hotel,on='arrival_date_month')
final.columns=['month','price_for_resort','price_for_city_hotel']
final
Out[53]:
month price_for_resort price_for_city_hotel
0 April 75.867816 111.962267
1 August 181.205892 118.674598
2 December 68.410104 88.401855
3 February 54.147478 86.520062
4 January 48.761125 82.330983
5 July 150.122528 115.818019
6 June 107.974850 117.874360
7 March 57.056838 90.658533
8 May 76.657558 120.669827
9 November 48.706289 86.946592
10 October 61.775449 102.004672
11 September 96.416860 112.776582
In [54]:
import sort_dataframeby_monthorweek as sd
In [55]:
final = sd.Sort_Dataframeby_Month(df = final, monthcolumnname = 'month')
final
Out[55]:
month price_for_resort price_for_city_hotel
0 January 48.761125 82.330983
1 February 54.147478 86.520062
2 March 57.056838 90.658533
3 April 75.867816 111.962267
4 May 76.657558 120.669827
5 June 107.974850 117.874360
6 July 150.122528 115.818019
7 August 181.205892 118.674598
8 September 96.416860 112.776582
9 October 61.775449 102.004672
10 November 48.706289 86.946592
11 December 68.410104 88.401855
In [56]:
px.line(final, x = 'month', y=['price_for_resort', 'price_for_city_hotel'], title ='Room price per night over the month' )

4. Distribution of Nights Spent at Hotels by Market Segment and Hotel Type¶

5. Analysing Preference of Guests, what they basically Prefer in meal?¶

In [57]:
data.columns
Out[57]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [58]:
data['market_segment'].value_counts()
Out[58]:
market_segment
Online TA        56408
Offline TA/TO    24182
Groups           19791
Direct           12582
Corporate         5282
Complementary      728
Aviation           235
Undefined            2
Name: count, dtype: int64
In [59]:
plt.figure(figsize = (8,8))
sns.boxplot(x = 'market_segment', 
           y = 'stays_in_weekend_nights', 
           hue = 'hotel',
           data = data)
Out[59]:
<Axes: xlabel='market_segment', ylabel='stays_in_weekend_nights'>
In [60]:
# Analysing Preference of Guests, what they basically Prefer in meal?
In [61]:
data['meal'].value_counts()
Out[61]:
meal
BB           92236
HB           14458
SC           10549
Undefined     1169
FB             798
Name: count, dtype: int64
In [62]:
px.pie(data, names = data['meal'].value_counts().index,
      values = data['meal'].value_counts().values, hole = 0.5)

5. Analyse Special Request done by Cuustomers¶

6. Which are the most busy month or in which months Guests are high?¶

In [63]:
data.groupby(['hotel', 'meal']).agg({'meal':'count'}).unstack()
Out[63]:
meal
meal BB FB HB SC Undefined
hotel
City Hotel 62233.0 44.0 6412.0 10474.0 NaN
Resort Hotel 30003.0 754.0 8046.0 75.0 1169.0
In [64]:
# Analyse Special Request done by Cuustomers
In [65]:
data.columns
Out[65]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [66]:
sns.countplot(x = 'total_of_special_requests', data = data)
plt.title("Total Special Request")
Out[66]:
Text(0.5, 1.0, 'Total Special Request')
In [67]:
sns.countplot(x = 'total_of_special_requests', data = data, hue = 'hotel')
plt.title("Total Special Request")
Out[67]:
Text(0.5, 1.0, 'Total Special Request')
In [68]:
data.groupby(['total_of_special_requests',
             'is_canceled']).agg({'total_of_special_requests':'count'})
Out[68]:
total_of_special_requests
total_of_special_requests is_canceled
0 0 36667
1 33534
1 0 25867
1 7316
2 0 10086
1 2866
3 0 2049
1 445
4 0 304
1 36
5 0 38
1 2
In [69]:
pivot = data.groupby(['total_of_special_requests',
             'is_canceled']).agg({'total_of_special_requests':'count'}).rename(columns = {'total_of_special_requests':'count'}).unstack()
pivot
Out[69]:
count
is_canceled 0 1
total_of_special_requests
0 36667 33534
1 25867 7316
2 10086 2866
3 2049 445
4 304 36
5 38 2
In [70]:
pivot.plot(kind= 'bar')
Out[70]:
<Axes: xlabel='total_of_special_requests'>
In [71]:
# Which are the most busy month or in which months Guests are high?
In [72]:
data_resort.head()
Out[72]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [73]:
data_resort.columns
Out[73]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [74]:
rush_resort=data_resort['arrival_date_month'].value_counts().reset_index()
rush_resort.columns=['month','no of guests']
rush_resort
Out[74]:
month no of guests
0 August 3257
1 July 3137
2 October 2575
3 March 2571
4 April 2550
5 May 2535
6 February 2308
7 September 2102
8 June 2037
9 December 2014
10 November 1975
11 January 1866
In [75]:
rush_city=data_city['arrival_date_month'].value_counts().reset_index()
rush_city.columns=['month','no of guests']
rush_city
Out[75]:
month no of guests
0 August 5367
1 July 4770
2 May 4568
3 June 4358
4 October 4326
5 September 4283
6 March 4049
7 April 4010
8 February 3051
9 November 2676
10 December 2377
11 January 2249
In [76]:
final_rush = rush_resort.merge(rush_city, on = 'month')
final_rush.columns = ['month', 'no of guests in resort', 'no of guests in city']
final_rush
Out[76]:
month no of guests in resort no of guests in city
0 August 3257 5367
1 July 3137 4770
2 October 2575 4326
3 March 2571 4049
4 April 2550 4010
5 May 2535 4568
6 February 2308 3051
7 September 2102 4283
8 June 2037 4358
9 December 2014 2377
10 November 1975 2676
11 January 1866 2249
In [77]:
final_rush = sd.Sort_Dataframeby_Month(df =final_rush, monthcolumnname = 'month')
final_rush
Out[77]:
month no of guests in resort no of guests in city
0 January 1866 2249
1 February 2308 3051
2 March 2571 4049
3 April 2550 4010
4 May 2535 4568
5 June 2037 4358
6 July 3137 4770
7 August 3257 5367
8 September 2102 4283
9 October 2575 4326
10 November 1975 2676
11 December 2014 2377
In [78]:
final_rush.dtypes
Out[78]:
month                     object
no of guests in resort     int64
no of guests in city       int64
dtype: object
In [79]:
px.line(data_frame =final_rush, x = 'month', 
       y = ['no of guests in resort', 'no of guests in city'],
       title = 'Total no og gauest per months')

6. How long do people stay at the hotels?¶

7. Bookings by market segment¶

In [80]:
data.head()
Out[80]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [81]:
filter = data['is_canceled'] == 0
clean_data = data[filter]
In [82]:
clean_data.head()
Out[82]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [83]:
clean_data.columns
Out[83]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [84]:
clean_data['total_nights_spend'] = clean_data['stays_in_weekend_nights'] + clean_data['stays_in_week_nights']
C:\Users\abhis\AppData\Local\Temp\ipykernel_10072\1100290991.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [85]:
clean_data.head()
Out[85]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date total_nights_spend
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015 0
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015 0
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015 1
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015 1
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015 2

5 rows × 33 columns

In [86]:
stay = clean_data.groupby(['total_nights_spend', 'hotel']).agg('count').reset_index()
stay = stay.iloc[:, 0:3]
stay.head()
Out[86]:
total_nights_spend hotel is_canceled
0 0 City Hotel 251
1 0 Resort Hotel 371
2 1 City Hotel 9155
3 1 Resort Hotel 6579
4 2 City Hotel 10983
In [87]:
stay = stay.rename(columns = {'is_canceled': 'Number of stay'})
stay.head()
Out[87]:
total_nights_spend hotel Number of stay
0 0 City Hotel 251
1 0 Resort Hotel 371
2 1 City Hotel 9155
3 1 Resort Hotel 6579
4 2 City Hotel 10983
In [88]:
plt.figure(figsize=(20, 8))
sns.barplot(x = 'total_nights_spend', y = 'Number of stay', hue = 'hotel',
           hue_order = ['City Hotel', 'Resort Hotel'], data = stay)
Out[88]:
<Axes: xlabel='total_nights_spend', ylabel='Number of stay'>
In [89]:
# Bookings by market segment
In [90]:
clean_data['market_segment'].value_counts()
Out[90]:
market_segment
Online TA        35673
Offline TA/TO    15880
Direct           10648
Groups            7697
Corporate         4291
Complementary      639
Aviation           183
Name: count, dtype: int64
In [91]:
fig = px.pie(clean_data,
            values = clean_data['market_segment'].value_counts().values,
            names = clean_data['market_segment'].value_counts().index,
            title = "Booking per market segment")
fig.update_traces(rotation=-90, textinfo="percent+label")
fig.show()

Assignment¶

1. How many bookings were cancelled?¶

2. Which month have the highest number of cancellations?¶

In [92]:
total_cancellations = df['is_canceled'].sum()
print(total_cancellations)
44224
In [93]:
cancellations_by_month = df[df['is_canceled'] == 1].groupby('arrival_date_month').size()
month_with_most_cancellations = cancellations_by_month.idxmax()
print(month_with_most_cancellations)
August